rm(list=ls(all=T))
pacman::p_load(readr, dplyr, ggplot2, stringr, maps, Matrix, d3heatmap, plotly, googleVis)
load("data/X.rdata")
# pat = paste(iso3166$ISOname, collapse="|")
# CX = str_extract_all(X$text, regex(pat, ignore.case=TRUE))
# save(CX, file="data/CX.rdata")
load("data/CX.rdata")
N9 = unlist(CX) %>% table %>% sort(dec=T) %>% {.[. > 9]} %>% names
C9 = lapply(CX, match, N9)
df = do.call(rbind, lapply(1:length(C9), function(i) {
  if(length(C9[[i]]) > 0) data.frame(i = i, j = C9[[i]])
  })) 
df = subset(df, complete.cases(df))
df$x = 1
mx = sparseMatrix(i=df$i, j=df$j, x=df$x, dimnames=list(1:10760, N9)) %>% 
  as.data.frame.matrix
z = table(X$sub) %>% sort(dec=T) %>% names
X$sub = factor(X$sub, levels=z)
X$year = format(X$date, "%Y")

No. Mentions per Subject/Country

mx = sparseMatrix(i=df$i, j=df$j, x=df$x, dimnames=list(1:10760, N9)) %>% 
  as.data.frame.matrix
A = sapply(split(mx, X$sub), colSums)
t(A)[,1:12]
##                          Germany Denmark China Netherlands Taiwan France
## Business & Finance           362     208   248         202    231    190
## R&D                          138     148   334          62     45     80
## Grid Connection              296     120    30         112      8     69
## Authorities                  133     113    64          92    105     83
## Technology                    91      74    93          43     11     45
## Operations & Maintenance     119      67    24          60     20     14
## Vessels                       66      41    54          51     24     12
## Training & Education          68      51    11          27     18      5
## Contracts & Tenders           54      33    17          58     65      4
## Environment                   16      12    10          14     18      1
## Ports & Logistics             32      24     3          18     17      7
## Jobs & Recruitment             2       6     3           2      4      0
## Industry Contribution          6       1     4           5     19      2
## Wind Farm Update               1       1     1           1      3      0
##                          Japan Norway Ireland Belgium India United States
## Business & Finance         185    107      70      95    50            34
## R&D                        117     64      70      28   140           102
## Grid Connection             22     95      37      61     4             7
## Authorities                 50     50     157      18    36            65
## Technology                  79     34      28      21    19            14
## Operations & Maintenance     8     32      19      23    16             7
## Vessels                      6     32       3      25     0             6
## Training & Education        10      6      23       0     6             7
## Contracts & Tenders         18      8       0      14     8             3
## Environment                  2      6       6       2    15            10
## Ports & Logistics            4      0      13      12     0             2
## Jobs & Recruitment           0      0       3       0     0             1
## Industry Contribution        5      3       0       1     1             1
## Wind Farm Update             0      0       0       3     0             0
t(A)[,1:12] %>% as.data.frame.matrix %>% d3heatmap(F,F,col="Greens")


Subjects by Country by Year

df = do.call(rbind, lapply(names(mx)[1:8], function(z){
  xtabs(~ sub + year, X[mx[,z] > 0 & X$year > 2010 & X$year < 2019, ])[1:8,] %>% t
  })) %>% data.frame
## Warning in data.row.names(row.names, rowsi, i): some row.names duplicated:
## 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64
## --> row.names NOT used
df = data.frame(
  country = factor(rep(names(mx)[1:8], each=8), names(mx)[1:8]),
  year = rep(2011:2018, 8),
  df)
df$Total = rowSums(df[,3:10]) 
Interactive Line Plot
ggplotly( ggplot(df, aes(x = year, y = R.D, col=country)) +
  geom_line(lwd=1) +
  ggtitle("No. Mention in R&D Sub-Category, Top 8 Countries")
  )


Fully Interactive Motion Bubble Plot
op = options(gvis.plot.tag='chart')
plot( gvisMotionChart(
  df, "country", "year", 
  options=list(width=800, height=600) ))